Manuscript title: Remdesivir prophylaxis but not later treatment limits measles-induced “immune amnesia” and measles antibody responses in macaques

Abstract: Despite the availability of a safe and effective vaccine, there is still no licensed antiviral therapy available for measles. We therefore evaluated the efficacy of a broad-spectrum antiviral, remdesevir, in wild-type measles virus (WT MeV)-infected RMs, and examines plasma antibody reactivity to viral epitopes via VirScan, a phage-display immunoprecipitation and sequencing (PhIP-Seq) technology. Here, VirScan analysis was performed on the longitudinal plasma samples from four RM treatment groups with both the RM and human virome library.

1. Loading libraries and datasets

1.1 Libraries

library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(gridExtra)
library(ggh4x)
library(ggpubr)
library(gt)

1.2 Datasets

VirScan raw datasets were first cleaned to remove control beads information, and transformed to four working datasets:

  • VARscore_1: VAR score is an overall score in antibody reactivity to each viral species in a serum sample
  • Merged_hfc_all: Total number of antibody hits to each viral peptide tile in a serum sample for all longitudinal timepoints
  • Total_peptide_hits_all: Total number of peptide tile hits that have a HFC of over 1 in each viral species for all longitudinal timepoints
  • Percentage_peptide_hits_all: TPH per number of viral peptide tiles of each viral species. Only viral species with Number_of_Peptide_Tiles >40 were included for subsequent analysis

Column explanation:

  • Treatment: Three groups of RMs were infected with WT MeV at D0, with the first group being given PBS as treatment (WTMeV), second group being given daily remdesevir infusion as post-exposure prohylasxis from D3 to D11 (RDV_PEP) and the third group being given daily remdesevir infusion late in infection from D11 to D22 (RDV_Late). The forth group was included as a sham control group (Sham_control).

  • DPI: Days post infusion (D0, D14_15, D28, D84_89, D112_168_176)

  • Macaques_ID: Identification number of each RM

# Load cleaned dataset
# VARscore_1 <- readRDS("VARscore_1.RDS") 
# Merged_hfc_all <- readRDS("Merged_hfc_all.RDS")
# Total_peptide_hits_all <- readRDS("Total_peptide_hits_all.RDS")


# Total_number_of_peptide_tiles_per_pathogen_all_table
Total_number_of_peptide_tiles_per_pathogen_all <- Merged_hfc_all %>%
  group_by(taxon_species, pep_aa, pos_start, pos_end, pep_id)%>%
  summarize()
## `summarise()` has grouped output by 'taxon_species', 'pep_aa', 'pos_start',
## 'pos_end'. You can override using the `.groups` argument.
Total_number_of_peptide_tiles_per_pathogen_all_table <- as.data.frame(table(Total_number_of_peptide_tiles_per_pathogen_all$taxon_species))%>%
  rename(taxon_species= Var1, Number_of_Peptide_Tiles = Freq)%>%
  filter(!row_number() %in% c(1))

# Percentage peptide hits (new metric)
Percentage_peptide_hits_all <- Total_peptide_hits_all %>%
  inner_join(Total_number_of_peptide_tiles_per_pathogen_all_table, by = "taxon_species") %>%
  filter(Number_of_Peptide_Tiles > 39) %>%
  mutate(pph = total_peptide_hits/Number_of_Peptide_Tiles*100)

2. Select viral species to be examined in the human and RM libraries

# Since the raw datasets combined data form the both human and macaque libraries, we need to separate them out

NHP_virus <- c("Simian foamy virus type 3", "Simian foamy virus type 1", "Macacine gammaherpesvirus 5", "Simian adenovirus B","Gammaherpesvirinae","Simian mastadenovirus G")
NHP_virus #6 NHP viral species
## [1] "Simian foamy virus type 3"   "Simian foamy virus type 1"  
## [3] "Macacine gammaherpesvirus 5" "Simian adenovirus B"        
## [5] "Gammaherpesvirinae"          "Simian mastadenovirus G"
top_viral_species_2<- VARscore_1 %>%
  dplyr :: filter (VAR >2)%>%
  arrange(-VAR)
top_viral_species_2 <- unique(top_viral_species_2$taxon_species) 

outersect <- function(x, y) {
  sort(c(setdiff(x, y),
         setdiff(y, x)))
}
top_viral_species_human_lib_2 <- outersect(top_viral_species_2,NHP_virus) 
top_viral_species_human_lib_2 #18 viral species from the human library based on VAR>2
##  [1] "BK polyomavirus"                     "Chikungunya virus"                  
##  [3] "Enterovirus A"                       "Enterovirus B"                      
##  [5] "Enterovirus C"                       "Enterovirus D"                      
##  [7] "Hepatitis B virus"                   "Hepatitis B virus subtype adr"      
##  [9] "Hepatitis delta virus"               "Hepatitis delta virus genotype I"   
## [11] "Human adenovirus D serotype 17"      "Human respiratory syncytial virus"  
## [13] "Human respiratory syncytial virus A" "Human spumaretrovirus"              
## [15] "JC polyomavirus"                     "Measles virus"                      
## [17] "Rhinovirus A"                        "Simian virus 40"
top_viral_species_10<- Percentage_peptide_hits_all %>%
  dplyr :: filter (pph >10)%>%
  arrange(-pph)
top_viral_species_10 <- unique(top_viral_species_10$taxon_species) #26 viral species has PPH>10 at any timepoint in any RM

top_viral_species_human_lib_10 <- top_viral_species_10[! top_viral_species_10 %in% c("Simian foamy virus type 3", "Simian foamy virus type 1")] 
top_viral_species_human_lib_10 #24 viral species from the human library based on PPH>10
##  [1] "Hepatitis delta virus genotype I"  "Simian virus 40"                  
##  [3] "Poliovirus type 1"                 "Hepatitis delta virus"            
##  [5] "Human adenovirus C serotype 6"     "Coxsackievirus A16"               
##  [7] "Measles virus"                     "Human enterovirus 70"             
##  [9] "Human rhinovirus 14"               "Human adenovirus 21"              
## [11] "Human respiratory syncytial virus" "Bunyavirus snowshoe hare"         
## [13] "Human adenovirus D serotype 15"    "Human adenovirus D serotype 9"    
## [15] "JC polyomavirus"                   "Coxsackievirus B6"                
## [17] "Echovirus 9"                       "Coxsackievirus A21"               
## [19] "Poliovirus type 2"                 "Coxsackievirus B2"                
## [21] "Echovirus 30"                      "BK polyomavirus"                  
## [23] "Coxsackievirus A24"                "Coxsackievirus A13"
intersect(top_viral_species_human_lib_2, top_viral_species_human_lib_10) #7 viral species overlap in both VARscore and PPH datasets
## [1] "BK polyomavirus"                   "Hepatitis delta virus"            
## [3] "Hepatitis delta virus genotype I"  "Human respiratory syncytial virus"
## [5] "JC polyomavirus"                   "Measles virus"                    
## [7] "Simian virus 40"

3. Visualizing longitudinal VAR scores

3.1 Heatmap (all longitudinal timepoints)

RDV_groups <- c("WTMeV","RDV_PEP", "RDV_Late", "Sham_control")
color_pattern_for_RDV_groups <- c("red","pink", "#6600CC", "#006699")

VARscore_per_group_heatmap_function <- function (virus_group){
  VARscore_subset_mean <- VARscore_1 %>%
    filter(taxon_species %in% virus_group)%>%
    group_by(Treatment, taxon_species, DPI)%>%
    summarise(average_VAR = mean(VAR))
  print(ggplot(VARscore_subset_mean, aes(x = DPI, y = taxon_species))+
          geom_raster(aes(fill = average_VAR))+
          scale_fill_gradient2(low="white", mid="white", high="darkblue", midpoint=0) +
          labs(x = "DPI",  y= "viral species", title = "Average VARscore for each RM group") +
          facet_wrap(~Treatment, ncol = 4, scales = "fixed") +
          theme_classic(base_size=14)+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
}
VARscore_per_RM_heatmap_function <- function (datatable, virus_group, treatment_group, color_pattern){
  
  VARscore_per_RM_heatmap_graph_function <- function (datatable, virus_group, treatment_group, color_pattern) {
    
    VARscore_subset <- VARscore_1 %>%
    filter(taxon_species %in% virus_group) %>%
    filter(Treatment == treatment_group)
  
    
  print(ggplot(VARscore_subset, aes(x=Macaques_ID , y = taxon_species))+
          geom_raster(aes(fill = VAR))+
          scale_fill_gradient2(low ="white", high= color_pattern) +
          labs(x = "Monkey ID",  y= "viral species", title = paste0("Average VARscore in the ", treatment_group, " group")) +
          facet_wrap( ~DPI, ncol = 5, nrow = 3, scales = "fixed")+
          theme_classic(base_size=14)+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

  VARscore_per_RM_heatmap_graph_function(datatable, virus_group, treatment_group[1], color_pattern[1])
    VARscore_per_RM_heatmap_graph_function(datatable, virus_group, treatment_group[2],color_pattern[2])
    VARscore_per_RM_heatmap_graph_function(datatable, virus_group, treatment_group[3], color_pattern[3])
    VARscore_per_RM_heatmap_graph_function(datatable, virus_group, treatment_group[4], color_pattern[4])
}
VARscore_per_group_heatmap_function(top_viral_species_human_lib_2)

VARscore_per_group_heatmap_function(NHP_virus)

VARscore_per_RM_heatmap_function(VARscore_1, top_viral_species_human_lib_2, RDV_groups, color_pattern_for_RDV_groups)

VARscore_per_RM_heatmap_function(VARscore_1, NHP_virus, RDV_groups, color_pattern_for_RDV_groups)

3.2 Lineplots/Boxplots (longitudinal timepoints)

VAR_graph_function_1 <- function (datatable) {

  ggplot(datatable, aes(x = DPI, y = VAR, group = Macaques_ID))+
    geom_line(aes(color = taxon_species), linewidth = 1, colour="lightgrey")+
    labs(x = "DPI",  y= "VAR Score") +
    facet_wrap(~Treatment, ncol = 4, scales = "fixed") +
    theme_classic(base_size=10)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
    stat_summary(aes(group=Treatment), fun=mean, geom="line", colour="red", linewidth = 2)
  
}
VAR_graph_function_2 <- function (datatable_0) {
  ggplot(datatable_0, aes(x=DPI, y=VAR,  color = Treatment)) + 
    geom_point() +
    theme_classic(base_size=10)+
    geom_line(aes(group = Treatment), stat = "summary" , fun = mean) +
    labs(x= "DPI" , y="Antibody reactivity (VAR scores)")+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))
}
VAR_graph_function_3 <- function (datatable_00) {
  ggplot(datatable_00, aes(x=DPI, y=VAR,  color = Treatment)) + 
    geom_boxplot()+
    geom_point(position=position_jitterdodge(jitter.width = 0, jitter.height = 0))+
    facet_wrap(~Treatment, ncol =4)+
    labs(x= "DPI" , y="Antibody reactivity (VAR scores)")+
    theme_classic(base_size=10)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))+
    theme(legend.position = "none")+
    stat_compare_means(method = "kruskal.test", label.x = 1.5, label.y = -1, color="black", size = 2)
  
}
VAR_overall_function <- function (i){
  
  plot_list_1 <<- list()
  plot_list_2 <<- list()
  plot_list_3 <<- list()

  for (k in i){
    datatable_1 <- VARscore_1 %>%
      filter(taxon_species == k)
    
    plot1 <- VAR_graph_function_1(datatable_1)+ labs(title = paste0("Changes in VAR scores of \n", k))
    plot2 <- VAR_graph_function_2(datatable_1)+ labs(title = paste0("Antibody reactivity of  \n", k))
    plot3 <- VAR_graph_function_3(datatable_1)+ labs(title = paste0("Antibody reactivity of  \n", k))

    plot_list_1 [[k]] <<- plot1
    plot_list_2 [[k]] <<- plot2
    plot_list_3 [[k]] <<- plot3
  }
}
VAR_overall_function (top_viral_species_human_lib_2)
grid.arrange(grobs = plot_list_1, ncol=3, nrow = 6)

grid.arrange(grobs = plot_list_2, ncol=3, nrow = 6)

grid.arrange(grobs = plot_list_3, ncol=3, nrow = 6)

VAR_overall_function(NHP_virus)
grid.arrange(grobs = plot_list_1, ncol=3, nrow = 2)

grid.arrange(grobs = plot_list_2, ncol=3, nrow = 2)

grid.arrange(grobs = plot_list_3, ncol=3, nrow = 2)

3.3 Measles-specific comparison

# MeV VARscore head on comparision per timepoint
ggplot(subset(VARscore_1, Treatment != "Sham_control" & taxon_species == "Measles virus"), aes(x = factor (Treatment, levels = c("WTMeV", "RDV_PEP", "RDV_Late")), y = VAR, fill = Treatment)) +
  geom_boxplot()+
  scale_fill_manual(values = c("#F8766D" , "#7CAE00"  ,"#00BFC4")) +
  stat_compare_means(method = "wilcox.test", comparison = list (c("WTMeV", "RDV_PEP"), c("WTMeV", "RDV_Late"), c("RDV_PEP", "RDV_Late")), size = 4, label = "p.signif") +
  theme_minimal(base_size = 16) +    
  facet_wrap(~DPI, ncol =5)+
  labs(title = "MeV VAR scores per timepoint per group",
       x = "RM Group",
       y = "VARScores",
       fill = "RM Group")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))

# MeV peptide hits head on comparision per timepoint
ggplot(subset(Total_peptide_hits_all, Treatment != "Sham_control" & taxon_species == "Measles virus"), aes(x = factor (Treatment, levels = c("WTMeV", "RDV_PEP", "RDV_Late")), y = total_peptide_hits, fill = Treatment)) +
 geom_boxplot()+
  scale_fill_manual(values = c("#F8766D" , "#7CAE00"  ,"#00BFC4")) +
  stat_compare_means(method = "wilcox.test", comparison = list (c("WTMeV", "RDV_PEP"), c("WTMeV", "RDV_Late"), c("RDV_PEP", "RDV_Late")), size = 4, label = "p.signif") +
  theme_minimal(base_size = 16) +    
  facet_wrap(~DPI, ncol =5)+
  labs(title = "MeV peptide hits per timepoint per group",
       x = "RM Group",
       y = "Total peptide hits",
       fill = "RM Group")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))

4. Immune amnesia calculation using VARscores

4.1 Boxplot (first/last timepoints, normalized)

RM_id <- unique(Total_peptide_hits_all$Macaques_ID)

WTMeV_RM_id<- c("104F", "108F", "42F" , "43F", "80F", "92F")
RDV_PEP_RM_id <- c("35F",  "41F", "64E",  "79F", "83F")
RDV_Late_RM_id <- c("57F","88F", "90F")
Sham_control_RM_id <- c("117H", "177H", "83H", "84H")

color_pattern_for_RDV_groups <- c("#F8766D" , "#7CAE00"  ,"#00BFC4","#C77CFF")

df_data <- data.frame()



VAR_numeric_change_over_time_per_RM <- function (datatable, color_pattern){
  
  ggplot(datatable, aes(x =DPI, y = numeric_change, fill = Treatment))+
           geom_boxplot(fill = color_pattern)+
           geom_point(color = "grey")+
           geom_line(aes(group = taxon_species), stat = "summary" , fun = mean, color = "grey") +
           theme_classic(base_size=20)+
    ylim(-2,1.5)+
    theme(axis.text=element_text(size=20), 
          axis.title =element_text(size=20))+
           labs(x= "DPI" , y="numeric change")+
           theme(legend.position="none")
}

VAR_numeric_change_over_time_per_RM_overall_function <- function (RM_id_all, color_pattern, timepoint, timepoint_2){
  plot_list_1 <<- list()
  
  for (k in RM_id_all){
    
    top_viral_species_based_on_VAR_variable <- VARscore_1 %>%
      filter (Macaques_ID == k & VAR >1 & DPI == "d0")
    
    top_viral_species_based_on_VAR_variable <- unique(top_viral_species_based_on_VAR_variable$taxon_species)

    VAR_numeric_change_over_time_per_RM_dataset <- VARscore_1 %>%
      filter(Macaques_ID == k & taxon_species %in% top_viral_species_based_on_VAR_variable & DPI %in% c("d0", timepoint)) %>%
      filter(taxon_species != "Measles virus")%>%
      select(-total_peps)%>%
      ungroup() %>%
      pivot_wider(names_from = DPI, values_from = VAR)%>%
      mutate(Numeric_VAR_score_changes_from_D0 = {{timepoint_2}} - d0)%>%
      mutate(d0 = 0)%>%
      pivot_longer(col = c("d0", "Numeric_VAR_score_changes_from_D0"), names_to= "DPI", values_to= "numeric_change")

    plot1 <- VAR_numeric_change_over_time_per_RM(VAR_numeric_change_over_time_per_RM_dataset, color_pattern)+
      labs (title = paste0( k,  "\nAvg numeric change = ", round(mean(VAR_numeric_change_over_time_per_RM_dataset$numeric_change)*2, 1),"\n", "Total viral species = ", length(unique(VAR_numeric_change_over_time_per_RM_dataset$taxon_species))))+
      scale_x_discrete(labels = c("d0", timepoint))
    
    plot_list_1 [[k]] <<- plot1
    df_data <<- rbind(df_data, round(mean(VAR_numeric_change_over_time_per_RM_dataset$numeric_change)*2, 1))
  }
}


df_data <- data.frame()
VAR_numeric_change_over_time_per_RM_overall_function (WTMeV_RM_id, "#F8766D","d14_15", d14_15 )
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_PEP_RM_id, "#7CAE00","d14_15", d14_15)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_Late_RM_id, "#00BFC4","d14_15", d14_15)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (Sham_control_RM_id, "#C77CFF", "d14_15", d14_15)
grid.arrange(grobs = plot_list_1, ncol = 6)

df_data_1 <- df_data 


df_data <- data.frame()
VAR_numeric_change_over_time_per_RM_overall_function (WTMeV_RM_id, "#F8766D","d28", d28 )
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_PEP_RM_id, "#7CAE00","d28", d28)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_Late_RM_id, "#00BFC4","d28", d28)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (Sham_control_RM_id, "#C77CFF", "d28", d28)
grid.arrange(grobs = plot_list_1, ncol = 6)

df_data_2 <- df_data 


df_data <- data.frame()
VAR_numeric_change_over_time_per_RM_overall_function (WTMeV_RM_id[1:5], "#F8766D","d84_89", d84_89 )
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_PEP_RM_id, "#7CAE00","d84_89", d84_89)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_Late_RM_id, "#00BFC4","d84_89", d84_89)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (Sham_control_RM_id, "#C77CFF", "d84_89", d84_89)
grid.arrange(grobs = plot_list_1, ncol = 6)

df_data_3 <- df_data 


df_data <- data.frame()

VAR_numeric_change_over_time_per_RM_overall_function (WTMeV_RM_id, "#F8766D", "d112_168_176", d112_168_176)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_PEP_RM_id, "#7CAE00", "d112_168_176", d112_168_176)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (RDV_Late_RM_id, "#00BFC4", "d112_168_176", d112_168_176)
grid.arrange(grobs = plot_list_1, ncol = 6)

VAR_numeric_change_over_time_per_RM_overall_function (Sham_control_RM_id, "#C77CFF", "d112_168_176", d112_168_176)
grid.arrange(grobs = plot_list_1, ncol = 6)

df_data_4 <- df_data 
df_data_1 <- df_data_1 %>%
  mutate(group = "d0 to d14_15")%>%
  mutate(RM_ID = c("104F", "108F", "42F" , "43F", "80F", "92F", "35F",  "41F", "64E",  "79F", "83F", "57F","88F", "90F", "117H", "177H", "83H", "84H")) %>%
  mutate(RM_group = c("WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_Late", "RDV_Late", "RDV_Late", "Sham_control", "Sham_control", "Sham_control", "Sham_control"))%>%
  rename (average_VAR_numeric_change = 1)%>%
  filter(RM_ID !="177H" )

df_data_2 <- df_data_2 %>%
  mutate(group = "d0 to d28")%>%
  mutate(RM_ID = c("104F", "108F", "42F" , "43F", "80F", "92F", "35F",  "41F", "64E",  "79F", "83F", "57F","88F", "90F", "117H", "177H", "83H", "84H")) %>%
  mutate(RM_group = c("WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_Late", "RDV_Late", "RDV_Late", "Sham_control", "Sham_control", "Sham_control", "Sham_control"))%>%
  rename (average_VAR_numeric_change = 1)%>%
  filter(RM_ID !="177H" )

df_data_3 <- df_data_3 %>%
  mutate(group = "d0 to d84_89")%>%
  mutate(RM_ID = c("104F", "108F", "42F" , "43F", "80F", "35F",  "41F", "64E",  "79F", "83F",  "57F","88F", "90F", "117H", "177H", "83H", "84H")) %>%
  mutate(RM_group = c("WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_Late", "RDV_Late", "RDV_Late", "Sham_control", "Sham_control", "Sham_control", "Sham_control"))%>%
  rename (average_VAR_numeric_change = 1)%>%
  filter(RM_ID !="177H" )

df_data_4 <- df_data_4 %>%
  mutate(group = "d0 to d112_168_176")%>%
  mutate(RM_ID = c("104F", "108F", "42F" , "43F", "80F", "92F", "35F",  "41F", "64E",  "79F", "83F",  "57F","88F", "90F", "117H", "177H", "83H", "84H")) %>%
  mutate(RM_group = c("WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "WTMeV", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_PEP", "RDV_Late", "RDV_Late", "RDV_Late", "Sham_control", "Sham_control", "Sham_control", "Sham_control"))%>%
  rename (average_VAR_numeric_change = 1)%>%
  filter(RM_ID !="177H" )

WT_PEP_Late_group_comparison <- function (dataset) {

ggplot(dataset, aes(x = factor (RM_group, levels = c("WTMeV", "RDV_PEP", "RDV_Late", "Sham_control")), y = as.numeric(average_VAR_numeric_change), fill = RM_group)) +
  geom_boxplot(fill = c("#F8766D" , "#7CAE00"  ,"#00BFC4","#C77CFF")) +
  stat_compare_means(method = "wilcox.test", comparison = list (c("WTMeV", "RDV_PEP"), c("WTMeV", "RDV_Late"), c("RDV_PEP", "RDV_Late"), c("WTMeV", "Sham_control"), c("RDV_PEP", "Sham_control"), c("RDV_Late", "Sham_control")), size = 4, label = "p.signif") +
  theme_minimal(base_size = 14) +
  labs(title = paste0(unique(dataset$group)),
       x = "RM Group",
       y = "Average Numeric Change",
       fill = "RM Group") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

}

WT_PEP_Late_group_comparison(df_data_1)

WT_PEP_Late_group_comparison(df_data_2)

WT_PEP_Late_group_comparison(df_data_3)

WT_PEP_Late_group_comparison(df_data_4)

df_data_all <- rbind (df_data_1, df_data_2, df_data_3, df_data_4)
ggplot(df_data_all, aes(x = factor (group , levels = c("d0 to d14_15", "d0 to d28", "d0 to d84_89", "d0 to d112_168_176")), y = as.numeric(average_VAR_numeric_change), fill = factor (group , levels = c("d0 to d14_15", "d0 to d28", "d0 to d84_89", "d0 to d112_168_176")))) +
  geom_boxplot() +
  stat_compare_means(method = "kruskal.test", size = 3, paired = TRUE) +
  facet_wrap (~factor (RM_group, levels = c("WTMeV", "RDV_PEP", "RDV_Late", "Sham_control")))+
  theme_minimal(base_size = 16) +
  labs(x = "RM Group",
       y = "Average Numeric Change",
       fill = "RM Group")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggplot(df_data_all, aes(x = factor (group , levels = c("d0 to d14_15", "d0 to d28", "d0 to d84_89", "d0 to d112_168_176")), y = as.numeric(average_VAR_numeric_change), color = factor (RM_group, levels = c("WTMeV", "RDV_PEP", "RDV_Late", "Sham_control")))) +
geom_boxplot() +
  scale_color_manual(values  = c("#F8766D" , "#7CAE00"  ,"#00BFC4","#C77CFF"))+
  stat_compare_means(method = "kruskal.test", size = 3, paired = TRUE) +
  facet_wrap (~factor (RM_group, levels = c("WTMeV", "RDV_PEP", "RDV_Late", "Sham_control")), ncol = 4)+
  theme_minimal(base_size = 14) +
  labs(x = "DPI",
       y = "Average Numeric Change",
       color = "RM Group")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

5. Average percentage peptide hits

5.1 Heatmaps (longitudinal timepoints)

PPH_per_group_heatmap_function <- function (datatable, virus_group, color_pattern) {
  
  PPH_variable <-datatable%>% 
    filter(taxon_species %in% virus_group) %>%
    group_by(taxon_species,DPI,Treatment)%>%
    mutate(average_pph = mean(pph))
  
  
  print(ggplot(PPH_variable, aes(x=DPI , y = taxon_species))+
          geom_raster(aes(fill = average_pph))+
          scale_fill_gradient2(low ="white", high= color_pattern) +
          labs(x = "DPI",  y= "viral species", title = "Average pph for each RM group") +
          facet_wrap( ~Treatment, ncol = 5, nrow = 1, scales = "fixed")+
          theme_classic(base_size=14)+
          theme(legend.position="right")+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

PPH_per_RM_heatmap_function <- function (datatable, virus_group, treatment_group, color_pattern){
  
  PPH_per_RM_graph_function <- function (datatable, virus_group, treatment_group, color_pattern) {
  PPH_variable <-datatable%>% 
    filter(taxon_species %in% virus_group) %>%
    filter(Treatment == treatment_group)
  
  print(ggplot(PPH_variable, aes(x=Macaques_ID , y = taxon_species))+
          geom_raster(aes(fill = pph))+
          scale_fill_gradient2(low ="white", high= color_pattern) +
          labs(x = "Monkey ID",  y= "viral species", title = paste0("Percentage peptide hits in the ", treatment_group, " group")) +
          facet_wrap( ~DPI, ncol = 5, nrow = 3, scales = "fixed")+
          theme_classic(base_size=14)+
          theme(legend.position="right")+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

  PPH_per_RM_graph_function(datatable, virus_group, treatment_group[1], color_pattern[1])
    PPH_per_RM_graph_function(datatable, virus_group, treatment_group[2],color_pattern[2])
    PPH_per_RM_graph_function(datatable, virus_group, treatment_group[3], color_pattern[3])
    PPH_per_RM_graph_function(datatable, virus_group, treatment_group[4], color_pattern[4])
}

RDV_groups <- c("WTMeV","RDV_PEP", "RDV_Late", "Sham_control")
color_pattern_for_RDV_groups <- c("red","pink", "#6600CC", "#006699")

PPH_per_group_heatmap_function(Percentage_peptide_hits_all, top_viral_species_human_lib_10, "springgreen4")

PPH_per_group_heatmap_function(Percentage_peptide_hits_all, NHP_virus, "springgreen4")

PPH_per_RM_heatmap_function(Percentage_peptide_hits_all, top_viral_species_human_lib_10, RDV_groups, color_pattern_for_RDV_groups)

PPH_per_RM_heatmap_function(Percentage_peptide_hits_all, NHP_virus, RDV_groups, color_pattern_for_RDV_groups)

5.2 Lineplots/Boxplots (longitudinal timepoints)

PPH_graph_function <- function (datatable) {
  ggplot(datatable, aes(x=DPI, y=pph, color = Treatment)) + 
    geom_point() +
    geom_line(aes(group = Treatment), stat = "summary" , fun = mean) +
    theme_classic(base_size=12)+
    labs(x= "DPI" , y="Percentage peptide hits")+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))
  
}
PPH_graph_function_2 <- function (datatable) {
  ggplot(datatable, aes(x=DPI, y=pph,  color = Treatment)) + 
    geom_boxplot()+
    geom_point(position=position_jitterdodge(jitter.width = 0, jitter.height = 0))+
    facet_wrap(~Treatment, ncol=4)+
    labs(x= "DPI" , y="Percentage peptide hits")+
    theme_classic(base_size=12)+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.2))+
    stat_compare_means(method = "kruskal.test", label.x = 1.5, label.y = -2, color="black", size = 2.5)
  
}
PPH_overall_function <- function (virus_group){
  plot_list_1 <<- list()
  plot_list_2 <<- list()

  for (k in virus_group){
    
    datatable<-  Percentage_peptide_hits_all %>%
      filter(taxon_species == k)
    
    plot1 <- PPH_graph_function(datatable)+ labs(title = paste0("PPH of  \n", k))
    plot2 <- PPH_graph_function_2(datatable)+ labs(title = paste0("PPH of  \n", k))
    plot_list_1 [[k]] <<- plot1
    plot_list_2 [[k]] <<- plot2

  }
}

PPH_overall_function (top_viral_species_human_lib_10)
grid.arrange(grobs = plot_list_1, ncol = 3, nrow = 8)

grid.arrange(grobs = plot_list_2, ncol = 2, nrow = 12)

PPH_overall_function (NHP_virus)
grid.arrange(grobs = plot_list_1, ncol=3, nrow = 2)

grid.arrange(grobs = plot_list_2, ncol=2, nrow = 3)

6. Total peptide hits per protein of measles

6.1 Heatmaps (longitudinal timepoints)

RDV_groups <- c("WTMeV","RDV_PEP", "RDV_Late", "Sham_control")
color_pattern_for_RDV_groups <- c("red","pink", "#6600CC", "#006699")

Viral_peptide_hits_datatable_function <- function(datatable, virus) {
  peptide_hits <-datatable %>%
    filter(taxon_species == virus)%>%
    filter_at(vars(gene_symbol), all_vars(!is.na(.)))%>%
    mutate(hit_or_not = ifelse(hfc > 1, 1,0))%>%
    group_by(Macaques_ID, gene_symbol,DPI,Treatment)%>%
    mutate(total_peptide_hits= sum(hit_or_not)) %>%
    group_by(gene_symbol,DPI,Treatment)%>%
    mutate(average_peptide_hits = mean(total_peptide_hits))
  return (peptide_hits)
}

Average_peptide_hits_per_protein_function <- function (datatable, virus, color_pattern){
  print(ggplot(datatable, aes(x=DPI , y = gene_symbol))+
          geom_raster(aes(fill = average_peptide_hits))+
          scale_fill_gradient2(low ="white", high=color_pattern, 
                               midpoint=0) +
          labs(x = "DPI",  y= "Protein",  title = paste0(virus," epitope mapping"), fill = "Average peptide hit per protein") +
          facet_wrap( ~Treatment, ncol = 4, scales = "fixed")+
          theme_classic(base_size=15)+
          theme(legend.position="bottom")+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

Peptide_hits_per_protein_function <- function (datatable, virus, treatment_group, color_pattern) {
  
  Peptide_hits_variable <-datatable %>%
    filter(Treatment==treatment_group)
  
  print(ggplot(Peptide_hits_variable, aes(x=Macaques_ID , y = gene_symbol))+
          geom_raster(aes(fill = total_peptide_hits))+
          scale_fill_gradient2(low ="white", high=color_pattern, 
                               midpoint=0) + #change to 10 or 12
          labs(x = "RM ID",  y= paste0 (virus, " proteins"),  title = paste0("Total peptide hits in ",treatment_group),  fill = "Total peptide hit \nper protein") +
          facet_wrap(Treatment ~ DPI, ncol = 5, nrow = 1, scales = "fixed")+
          theme_classic(base_size=15)+
          theme(legend.position="right")+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

Peptide_hits_per_protein_function_all <- function (datatable, virus, treatment_group, color_pattern) {
  
  Peptide_hits_variable <-datatable %>%
    filter(Treatment==treatment_group)
  
  print(ggplot(Peptide_hits_variable, aes(x=Macaques_ID , y = gene_symbol))+
          geom_raster(aes(fill = total_peptide_hits))+
          scale_fill_gradient2(low ="white", high=color_pattern, 
                               midpoint=0) + #change to 10 or 12
          labs(x = "RM ID",  y= paste0 (virus, " proteins"),  title = paste0("Total peptide hits in ",treatment_group),  fill = "Total peptide hit \nper protein") +
          facet_wrap(Treatment ~ DPI, ncol = 5, nrow = 2, scales = "fixed")+
          theme_classic(base_size=12)+
          theme(legend.position="right")+
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
  
}

Overall_peptide_hits_per_protein_function<- function (datatable, virus, treatment_group, color_pattern){
  Peptide_hits_per_protein_function(datatable, virus, treatment_group[1], color_pattern[1])
    Peptide_hits_per_protein_function(datatable, virus, treatment_group[2],color_pattern[2])
    Peptide_hits_per_protein_function(datatable, virus, treatment_group[3], color_pattern[3])
    Peptide_hits_per_protein_function(datatable, virus, treatment_group[4], color_pattern[4])
}

Overall_peptide_hits_per_protein_function_all<- function (datatable, virus, treatment_group, color_pattern){
  Peptide_hits_per_protein_function_all(datatable, virus, treatment_group[1], color_pattern[1])
    Peptide_hits_per_protein_function_all(datatable, virus, treatment_group[2],color_pattern[2])
    Peptide_hits_per_protein_function_all(datatable, virus, treatment_group[3], color_pattern[3])
    Peptide_hits_per_protein_function_all(datatable, virus, treatment_group[4], color_pattern[4])
}

Measles_peptide_hits <- Viral_peptide_hits_datatable_function(Merged_hfc_all, "Measles virus") %>% #type in any virus of interest to visualize peptide hit per protein
  mutate (gene_symbol = if_else(gene_symbol == "P/V", "P/V/C", gene_symbol))%>% #change problematic gene_symbol naming 
  mutate (gene_symbol = if_else(gene_symbol == "P", "P/V/C", gene_symbol))

Average_peptide_hits_per_protein_function(Measles_peptide_hits, "Measles", "springgreen4")

Overall_peptide_hits_per_protein_function_all(Measles_peptide_hits,"Measles",  RDV_groups, color_pattern_for_RDV_groups)

6.2 Hit fold change per selected peptide tile in measles (longitudinal timepoints)

#RDV facet designs
design <- "
ABCDEF
GHIJK#
LMN###
OPQR##
"

Epitopes_list_and_dataset_function <- function (datatable, virus, hfc_value){
  Epitopes_list <- datatable %>%
    filter(taxon_species == virus) %>%
    # mutate (gene_symbol = if_else(gene_symbol == "P/V", "P/V/C", gene_symbol))%>% 
    # mutate (gene_symbol = if_else(gene_symbol == "P", "P/V/C", gene_symbol))%>%
    mutate(Viral_epitope_tile = paste0 (gene_symbol, pos_start, "_", pos_end )) %>%
    filter (hfc > hfc_value) 
  
  Epitopes_list <- unique(Epitopes_list$Viral_epitope_tile)
  
  Epitopes_dataset<- datatable %>%
    filter(taxon_species == virus) %>%
    mutate(Viral_epitope_tile = paste0 (gene_symbol, pos_start, "_", pos_end ))%>%
    filter(Viral_epitope_tile %in% Epitopes_list) %>%
    group_by(Viral_epitope_tile, DPI, Treatment)%>%
    mutate(averaged_hfc = mean (hfc))
  
  return (Epitopes_dataset)
}

Epitopes_per_group_graph_function <- function (datatable){
  
  ggplot(datatable, aes(x = DPI , y = Viral_epitope_tile))+
    geom_raster(aes(fill = averaged_hfc)) +
    scale_fill_gradientn(colors= c("white", "darkblue", "black")) +
    labs(x = "DPI") +
    facet_wrap(~Treatment, ncol = 4, nrow = 1, scales = "fixed") +
    theme_minimal() +
    theme(axis.text = element_text(size=10))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}

Epitopes_per_RM_per_individual_group_graph_function <- function (datatable, treatment_type ){
  ggplot(subset(datatable, Treatment %in% treatment_type), aes(x = DPI , y = Viral_epitope_tile))+
    geom_raster(aes(fill = hfc)) +
    scale_fill_gradientn(colors= c("white", "darkblue", "black")) +
    labs(x = "DPI") +
    facet_wrap(~Treatment, ncol = 4, nrow = 1, scales = "fixed") +
    theme_minimal() +
    theme(axis.text = element_text(size=10))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  
}

Epitopes_per_RM_per_group_graph_function <- function (datatable, treatment_type ){
  ggplot(subset(datatable, Treatment %in% treatment_type), aes(x = DPI , y = Viral_epitope_tile))+
    geom_raster(aes(fill = hfc)) +
    scale_fill_gradientn(colors= c("white", "darkblue", "black")) +
    labs(x = "DPI") +
    facet_manual(Treatment~Macaques_ID, design=design) + 
    theme_minimal() +
    theme(axis.text = element_text(size=10))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
  
}

Measles_epitopes_dataset <- Epitopes_list_and_dataset_function (Merged_hfc_all, "Measles virus", 2)


Epitopes_per_group_graph_function(Measles_epitopes_dataset)

Epitopes_per_RM_per_individual_group_graph_function(Measles_epitopes_dataset,"WTMeV")

Epitopes_per_RM_per_individual_group_graph_function(Measles_epitopes_dataset,"RDV_PEP")

Epitopes_per_RM_per_individual_group_graph_function(Measles_epitopes_dataset,"RDV_Late")

Epitopes_per_RM_per_individual_group_graph_function(Measles_epitopes_dataset,"Sham_control")

#Measles_epitopes_list_hfc_over_50 
Measles_epitopes_dataset_hfc_over_50 <- Epitopes_list_and_dataset_function (Merged_hfc_all, "Measles virus", 50)

Epitopes_per_group_graph_function(Measles_epitopes_dataset_hfc_over_50)

Epitopes_per_RM_per_group_graph_function(Measles_epitopes_dataset_hfc_over_50,RDV_groups)

#Measles_epitopes_dataset_filter_H 
Measles_epitopes_dataset_filter_H <- Measles_epitopes_dataset %>%
  filter(Viral_epitope_tile  != "P/V449_504" & Viral_epitope_tile  != "N477_525" & Viral_epitope_tile  != "H1_56") %>%
  filter(grepl('H', Viral_epitope_tile))

Epitopes_per_group_graph_function(Measles_epitopes_dataset_filter_H)

Epitopes_per_RM_per_group_graph_function(Measles_epitopes_dataset_filter_H,RDV_groups)

#Measles_epitopes_dataset_filter_F 
Measles_epitopes_dataset_filter_F <- Measles_epitopes_dataset %>%
  filter(Viral_epitope_tile  != "P/V449_504" & Viral_epitope_tile  != "N477_525" & Viral_epitope_tile  != "H1_56") %>%
  filter(grepl('F', Viral_epitope_tile))

Epitopes_per_group_graph_function(Measles_epitopes_dataset_filter_F)

Epitopes_per_RM_per_group_graph_function(Measles_epitopes_dataset_filter_F,RDV_groups)

6.3 Hit fold change per important peptide tile in measles (longitudinal timepoints)

Epitope_tile_hfc_function <- function (epitope_dataset, virus, gene_symbol_variable, viral_epitope_tile_variable) {
  epitope_tile_hfc_variable<- epitope_dataset %>%
    filter (gene_symbol ==gene_symbol_variable) %>%
    filter (Viral_epitope_tile ==viral_epitope_tile_variable) %>%
    filter (Duplicate != 2) %>%
    group_by(DPI, Macaques_ID, Treatment) %>%
    mutate(sum_of_hfc = sum(hfc))
  
  plot_list <<- list()
  
  plot1 <- ggplot(epitope_tile_hfc_variable, aes(x=DPI, y=sum_of_hfc, group = Macaques_ID, color = Treatment)) + 
    geom_point() +
    geom_line(linetype = "solid", color = "gray") + theme_bw() + 
    labs(x= "DPI" , y="peptide_hfc" , title = paste0("Hit-fold change of ", virus," ", gene_symbol_variable," protein\n", "peptide tile " , viral_epitope_tile_variable))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
    facet_wrap(~Treatment)
  
  plot2 <- ggplot(epitope_tile_hfc_variable, aes(x=DPI, y=sum_of_hfc, color = Treatment)) + 
    geom_point(position=position_jitterdodge(jitter.width = 0, jitter.height = 0))+
    geom_boxplot()+
    theme_bw() + 
    labs(x= "DPI" , y="peptide_hfc" , title = paste0("Average hit-fold change of ", virus," ", gene_symbol_variable," protein\n", "peptide tile " , viral_epitope_tile_variable))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
    facet_wrap(~Treatment)
  
  plot3 <- ggplot(epitope_tile_hfc_variable, aes(x=DPI, y=sum_of_hfc, group = Macaques_ID, color = Treatment)) + 
    geom_point() +
    geom_line(linetype = "solid", color = "gray") + theme_bw() + 
    labs(x= "DPI" , y="peptide_hfc" , title = paste0("Hit-fold change of ", virus," ", gene_symbol_variable," protein\n", "peptide tile " , viral_epitope_tile_variable))+
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
    facet_manual(Treatment~Macaques_ID, design=design)
  
  plot_list <<-list(plot1, plot2, plot3)
  
}

Epitope_tile_hfc_function (Measles_epitopes_dataset, "measles", "N", "N477_525")
grid.arrange(grobs = plot_list, widths = c(2, 1, 1),layout_matrix = rbind(c(1, 3, 3), c(2, 3, 3)))

Epitope_tile_hfc_function (Measles_epitopes_dataset, "measles", "H", "H1_56")
grid.arrange(grobs = plot_list, widths = c(2, 1, 1),layout_matrix = rbind(c(1, 3, 3), c(2, 3, 3)))

Epitope_tile_hfc_function (Measles_epitopes_dataset, "measles", "P/V", "P/V449_504")
grid.arrange(grobs = plot_list, widths = c(2, 1, 1),layout_matrix = rbind(c(1, 3, 3), c(2, 3, 3)))

7. Total peptide hits per protein of other viruses

# Top viral species that overlapped in both VarScore and PPH dataset
overlapping_viral_species <- intersect(top_viral_species_human_lib_2, top_viral_species_human_lib_10)
overlapping_viral_species
## [1] "BK polyomavirus"                   "Hepatitis delta virus"            
## [3] "Hepatitis delta virus genotype I"  "Human respiratory syncytial virus"
## [5] "JC polyomavirus"                   "Measles virus"                    
## [7] "Simian virus 40"
Average_peptide_hits_per_protein_of_selected_viral_species_function <- function (viral_species_selected) {
  
  hfc_dataset_of_selected_viral_species <- Viral_peptide_hits_datatable_function(Merged_hfc_all, viral_species_selected)

 Average_peptide_hits_per_protein_function(hfc_dataset_of_selected_viral_species, viral_species_selected, "springgreen4")

 
}

Average_peptide_hits_per_protein_of_selected_viral_species_function("BK polyomavirus")

Average_peptide_hits_per_protein_of_selected_viral_species_function("Hepatitis delta virus")

Average_peptide_hits_per_protein_of_selected_viral_species_function("Human respiratory syncytial virus")

Average_peptide_hits_per_protein_of_selected_viral_species_function("JC polyomavirus")

Average_peptide_hits_per_protein_of_selected_viral_species_function("Simian virus 40")

Average_peptide_hits_per_protein_of_selected_viral_species_function("Simian foamy virus type 1")

Average_peptide_hits_per_protein_of_selected_viral_species_function("Simian foamy virus type 3")